clear all 
set more off
set seed 20082024

global TABLE_PATH  "C:\Users\Joel Stiebale\Dropbox\RnD_tax_credit\Tables"
global MY_IN_PATH   "C:\Users\Joel Stiebale\Dropbox\RnD_tax_credit\ReStat_repl_package\Data"
global MY_OUT_PATH  "C:\Users\Joel Stiebale\Dropbox\RnD_tax_credit\ReStat_repl_package\Data"

use "${MY_IN_PATH}\inv_loc.dta", clear   

capture log close 
log using "${MY_OUT_PATH}\BFSV_T4.log", replace

gen c50_old = (c50_new - 1) * (-1)
gen c75_old = (c75_new - 1) * (-1)

foreach var of varlist c50_old c75_old c50_new c75_new{
gegen n`var'fs = sum(`var'),  by(fips_state gvkey year)
}

gegen fid = group(gvkey)
gegen stid = group(state_fips)

					
** panel fill up
duplicates drop fid stid year, force
egen fsid = group(fid stid)
tsset fsid year

tsfill
tsset fsid year

foreach var of varlist nc50_oldfs nc50_newfs {
replace `var' = 0 if `var'==. 
}  

foreach var of varlist state_fips stid fid  state_fips  {
bysort fsid (year): replace `var' = `var'[_n-1] if `var' ==. & fsid == fsid[_n-1]
}

merge m:1 state_fips year using "$MY_IN_PATH\wilson_details_sfips.dta"
keep if _merge == 3
drop _merge


* first tax exposure and treatment
egen _first_tax = min(year) if rd_efr_hi>0 & rd_efr_hi<., by(fips)
recode _first_tax (.=0)

egen first_tax = max(_first_tax), by(fips)

duplicates drop fid stid year, force

gegen fy = min(year), by(fid stid)

gen dtreat = l1rd_efr_hi > 0
gegen ntreat = sum(dtreat), by(fid year)
gegen nfy = count(1), by(fid year)
gegen max = max(l1rd_efr_hi), by(fid year)
gegen evtreat = sum(l1rd_efr_hi), by(state_fips)


** Merge CS data
merge m:1 gvkey year using "${MY_IN_PATH}\cs_sample_all_patenting.dta"
keep if _merge == 3
drop _merge

bys fid stid: drop if _N<2
drop if year > 2006
drop if year < 1977

gen npat = nc50_oldfs + nc50_newfs
sum nc50_oldfs nc50_newfs ,d

gen nc50_oldfs_500 = min(500, nc50_oldfs) if nc50_oldfs!=.
gen npat_500 = nc50_newfs + nc50_oldfs_500

**************************************************************************************************************************
* descriptives: 
tabstat rd_efr_hi npat_500 nc50_oldfs_500 nc50_newfs   if first_tax<=1991, stats(mean sd) columns(stats) format(%9.3f)
**************************************************************************************************************************	

********************************************************************************
* Regressions for Table 4:
qui ppmlhdfe npat_500 l1rd_efr_hi if first_tax<=1991, absorb(stid#fid year) cluster(stid) separation(ir)
estimates store a1 
scalar ra_npat_500 = e(r2_p)
qui ppmlhdfe nc50_oldfs_500 l1rd_efr_hi if first_tax<=1991, absorb(stid#fid year) cluster(stid) separation(ir)
estimates store b1
scalar ra_nc50_oldfs_500 = e(r2_p)
qui ppmlhdfe nc50_newfs l1rd_efr_hi if first_tax<=1991, absorb(stid#fid year) cluster(stid) separation(ir)
estimates store c1
scalar ra_nc50_newfs = e(r2_p)

* define cohorts and treatments
global cohort2 first_taxB	
capture  drop first_taxB
gen first_taxB = first_tax
replace first_taxB = 1990 if state=="WI" | state=="IN" | state=="IA" | state=="CA"  | state=="ND"

global first 5 
global last  15

* define relative time indicators
capture drop ry
gen ry = year - $cohort2

forvalues k = 1/$first {
capture drop g_`k'
gen g_`k' = ry == -1 * `k'
la var g_`k' "-`k'"
}

forvalues k = 0/$last {
capture drop g`k'
gen g`k' = ry == `k'
la var g`k' "`k'"
}

global varlistpostl
global vpostl 
 
forvalues k = 1/ $last {
global varlistpostl $varlistpostl i.$cohort2#c.g`k'
global vpostl $vpostl g`k'
}


* caluclate overall time-averaged treatment effect
foreach y in npat_500 nc50_oldfs_500 nc50_newfs {
qui ppmlhdfe `y'    $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , ///
 absorb(stid#fid year) vce(cluster stid) keepsingle  separation(ir)

scalar rb_`y' = e(r2_p)
capture drop wsample
gen wsample = e(sample)
 
qui {
count if wsample==1 & year>$cohort2 & $cohort2!=0
local N = r(N)
local att 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort2 & $cohort2!=0
levelsof $cohort2 if wsample==1 & year>$cohort2 & $cohort2!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort2#c.`v']*`s' /`N'
					}
						}
  }
nlcom (att: `att' ) , post
est store e1_`y'					
}


********************************************************************************
* TABLE 4, PANEL A
********************************************************************************

esttab  a1 b1 c1 , replace se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01) noobs ///
					keep(l1rd_efr_hi) sfmt(%10.0f) title("Tax credits, and patenting, firm-state level, Poisson / exponential mean regression, tax credit rate") /// 
					label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  l1_treat "TaxCreditEvent" ) 				

* generate counts for number of observations, firms and states
foreach y in  npat_500 nc50_oldfs_500 nc50_newfs {
qui poisson `y'  l1rd_efr_hi if first_tax<=1991, cluster(stid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_a=group(gvkey state) if fullsample_a==1
qui sum(id_a)
di "N firm#state for `y' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a id_a id_b
di "Pseudo R2:" ra_`y' 
}					
		
********************************************************************************
* TABLE 4, PANEL B
********************************************************************************

esttab  e1_npat_500 e1_nc50_oldfs_500 e1_nc50_newfs , replace se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01) noobs ///
					 sfmt(%10.0f) title("Tax credits, and patenting, firm-state level, Poisson / exponential mean regression, tax credit rate")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  l1_treat "TaxCreditEvent" ) 				

* generate counts for number of observations, firms and states
foreach y in  npat_500 nc50_oldfs_500 nc50_newfs {
qui {
poisson `y'  $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0), cluster(stid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
sum Nsample
	}
di "Observations:"
di "N  for `y': `r(max)'"
qui egen id_a=group(gvkey state) if fullsample_a==1
qui sum(id_a)
di "N firm#state for `y': `r(max)'"
qui egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y': `r(max)'"
drop Nsample fullsample_a id_a id_b
di "Pseudo R2 for `y':" rb_`y'
												}


********************************************************************************									
********************************************************************************
* Regressions for Appendix Table A12, panel a&b
* control for state-specific trends
capture drop st_coefall st_coefold st_coefnew

* stage 1: generate state-specific trends
capture drop __hdfe3__Slope1					
qui ppmlhdfe npat_500 if (year<=first_taxB  | first_taxB==0) & first_tax<=1991, absorb(stid fid c.year#stid, savefe) cluster(stid) keepsingle separation(ir)
egen st_coefall = max(__hdfe3__Slope1), by(stid)					
capture drop __hdfe3__Slope1					
qui ppmlhdfe nc50_oldfs_500 if (year<=first_taxB  | first_taxB==0) & first_tax<=1991, absorb(stid fid c.year#stid, savefe) cluster(stid) keepsingle separation(ir)
egen st_coefold = max(__hdfe3__Slope1), by(stid)					
capture drop __hdfe3__Slope1					
qui ppmlhdfe nc50_newfs if (year<=first_taxB  | first_taxB==0) & first_tax<=1991, absorb(stid fid c.year#stid, savefe) cluster(stid) keepsingle separation(ir)
egen st_coefnew = max(__hdfe3__Slope1), by(stid)					
capture drop __hdfe3__Slope1			

capture drop st_npat_500 st_nc50_oldfs_500 st_nc50_newfs

gen st_npat_500  = st_coefall*(year-1977)
gen st_nc50_oldfs_500 = st_coefold*(year-1977)
gen st_nc50_newfs = st_coefnew*(year-1977)


* stage 2: include offset
qui ppmlhdfe npat_500 l1rd_efr_hi if first_tax<=1991, absorb(stid fid year) offset(st_npat_500) cluster(stid) separation(ir)
est store a2
scalar ra_npat_500 = e(r2_p)
qui ppmlhdfe nc50_oldfs_500 l1rd_efr_hi if first_tax<=1991, absorb(stid fid year) offset(st_nc50_oldfs_500) cluster(stid) separation(ir)
est store b2
scalar ra_nc50_oldfs_500 = e(r2_p)
qui ppmlhdfe nc50_newfs l1rd_efr_hi if first_tax<=1991, absorb(stid fid year) offset(st_nc50_newfs) cluster(stid) separation(ir)
est store c2					
scalar ra_nc50_newfs = e(r2_p)	
			
				
********************************************************************************
* wooldridge estimator with state-trends
* caluclate overall time-averaged treatment effect
foreach y in npat_500 nc50_oldfs_500 nc50_newfs {
qui ppmlhdfe `y'  $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , ///
 absorb(stid fid year) offset(st_`y') vce(cluster stid) keepsingle  separation(ir)
 
scalar rb_`y' = e(r2_p)
capture drop wsample
gen wsample = e(sample)
 
qui {
count if wsample==1 & year>$cohort2 & $cohort2!=0
local N = r(N)
local att 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort2 & $cohort2!=0
levelsof $cohort2 if wsample==1 & year>$cohort2 & $cohort2!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort2#c.`v']*`s' /`N'
					}
						}
  }
nlcom (att: `att' ) , post
est store e2_`y'					
}

********************************************************************************
* TABLE A12, PANEL A
********************************************************************************
					
esttab  a2 b2 c2 , replace se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  noobs ///
					keep(l1rd_efr_hi) sfmt(%10.0f) title("Tax credits, and patenting, firm-state level, Poisson / exponential mean regression, tax credit rate")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  l1_treat "TaxCreditEvent" ) 				
* generate counts for number of observations, firms and states
foreach y in  npat_500 nc50_oldfs_500 nc50_newfs {
qui {
poisson `y'  l1rd_efr_hi st_`y' if first_tax<=1991, cluster(stid)   
qui gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
sum Nsample
	}
di "Observations:"
di "N  for `y': `r(max)'"
egen id_a=group(gvkey state) if fullsample_a==1
qui sum(id_a)
di "N firm#state for `y': `r(max)'"
qui egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y': `r(max)'"
drop Nsample fullsample_a id_a id_b
di "Pseudo R2 for `y':" ra_`y'
												}	 
	
	
		
					
********************************************************************************
* TABLE A12, PANEL B
********************************************************************************

esttab e2_npat_500 e2_nc50_oldfs_500 e2_nc50_newfs , replace se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  noobs ///
					sfmt(%10.0f) title("Tax credits, and patenting, firm-state level, Poisson / exponential mean regression, tax credit rate") ///
  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  l1_treat "TaxCreditEvent" ) 										
* generate counts for number of observations, firms and states
foreach y in  npat_500 nc50_oldfs_500 nc50_newfs {
qui { 
poisson `y'  $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0), cluster(stid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
sum Nsample
	}
di "N  for `y': `r(max)'"
qui egen id_a=group(gvkey state) if fullsample_a==1
qui sum(id_a)
di "N firm#state for `y': `r(max)'"
qui egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y': `r(max)'"
drop Nsample fullsample_a id_a id_b
di "Pseudo R2 for `y':" rb_`y'
												}
				

********************************************************************************
* Regressions for Appendix Table A12, panel c&d

capture drop st_coefall st_coefold st_coefnew

* stage 1: generate firm-specific trends
capture drop __hdfe3__Slope1					
 ppmlhdfe npat_500 if (year<=first_taxB  | first_taxB==0) & first_tax<=1991, absorb(stid fid c.year#fid, savefe) cluster(stid) keepsingle
egen st_coefall = max(__hdfe3__Slope1), by(fid)					
capture drop __hdfe3__Slope1					
 ppmlhdfe nc50_oldfs_500 if (year<=first_taxB  | first_taxB==0) & first_tax<=1991, absorb(stid fid c.year#fid, savefe) cluster(stid) keepsingle 
egen st_coefold = max(__hdfe3__Slope1), by(fid)					
capture drop __hdfe3__Slope1					
 ppmlhdfe nc50_newfs if (year<=first_taxB  | first_taxB==0) & first_tax<=1991, absorb(stid fid c.year#fid, savefe) cluster(stid) keepsingle 
egen st_coefnew = max(__hdfe3__Slope1), by(fid)					
capture drop __hdfe3__Slope1					
capture drop st_npat_500 st_nc50_oldfs_500 st_nc50_newfs

gen st_npat_500  = st_coefall*(year-1977)
gen st_nc50_oldfs_500 = st_coefold*(year-1977)
gen st_nc50_newfs = st_coefnew*(year-1977)


* stage 2: include offset
qui ppmlhdfe npat_500 l1rd_efr_hi if first_tax<=1991, absorb(stid fid year) offset(st_npat_500) cluster(stid) separation(ir)
est store a3
qui ppmlhdfe nc50_oldfs_500 l1rd_efr_hi if first_tax<=1991, absorb(stid fid year) offset(st_nc50_oldfs_500) cluster(stid) separation(ir)
est store b3
qui ppmlhdfe nc50_newfs l1rd_efr_hi if first_tax<=1991, absorb(stid fid year) offset(st_nc50_newfs) cluster(stid) separation(ir)
est store c3					
					

* wooldridge estimator with firm trends
* caluclate overall time-averaged treatment effect
foreach y in npat_500 nc50_oldfs_500 nc50_newfs {
qui ppmlhdfe `y'  $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , ///
 absorb(stid fid year) offset(st_`y') vce(cluster stid) keepsingle separation(ir)

scalar rb_`y' = e(r2_p)
capture drop wsample
gen wsample = e(sample)
 
qui {
count if wsample==1 & year>$cohort2 & $cohort2!=0
local N = r(N)
local att 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort2 & $cohort2!=0
levelsof $cohort2 if wsample==1 & year>$cohort2 & $cohort2!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort2 ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort2#c.`v']*`s' /`N'
					}
						}
  }
nlcom (att: `att' ) , post
est store e3_`y'					
}


********************************************************************************
* TABLE A12, PANEL C
********************************************************************************
					
esttab  a3 b3 c3 , replace	 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01) noobs  ///
					keep(l1rd_efr_hi) sfmt(%10.0f) title("Tax credits, and patenting, firm-state level, Poisson / exponential mean regression, tax credit rate") ///
					label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  l1_treat "TaxCreditEvent" ) 				
					
* generate counts for number of observations, firms and states
foreach y in  npat_500 nc50_oldfs_500 nc50_newfs {
qui { 
poisson `y'  l1rd_efr_hi  if first_tax<=1991, cluster(stid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
sum Nsample
	}
di "Observations:"
di "N  for `y': `r(max)'"
qui egen id_a=group(gvkey state) if fullsample_a==1
qui sum(id_a)
di "N firm#state for `y': `r(max)'"
qui egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y': `r(max)'"
drop Nsample fullsample_a id_a id_b
di "Pseudo R2 for `y':" ra_`y'
}					
					
			
********************************************************************************
* TABLE A12, PANEL D
********************************************************************************	
esttab e3_npat_500 e3_nc50_oldfs_500 e3_nc50_newfs , replace	 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  noobs ///
					sfmt(%10.0f) title("Tax credits, and patenting, firm-state level, Poisson / exponential mean regression, tax credit rate") /// 
					label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  l1_treat "TaxCreditEvent" ) 										

foreach y in  npat_500 nc50_oldfs_500 nc50_newfs {
qui { 
poisson `y'  $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0), cluster(stid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
sum Nsample
	}
di "Observations:"
di "N  for `y': `r(max)'"
qui egen id_a=group(gvkey state) if fullsample_a==1
qui sum(id_a)
di "N firm#state for `y': `r(max)'"
qui egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y': `r(max)'"
drop Nsample fullsample_a id_a id_b
di "Pseudo R2 for `y':" rb_`y'
												}
		
					
log close
								
exit
